Antonio Coín Castro
# -- Libraries
from matplotlib import pyplot as plt
from matplotlib.lines import Line2D
import arviz as az
import numpy as np
from sklearn.svm import SVR
import pandas as pd
from IPython.display import display
import logging
import skfda
from _fpca_basis import FPCABasis
from itertools import product
from skfda.preprocessing.dim_reduction.variable_selection import (
RecursiveMaximaHunting as RMH,
MinimumRedundancyMaximumRelevance as mRMR,
)
import os
from skfda.preprocessing.dim_reduction.feature_extraction import FPCA
from skfda.ml.regression import KNeighborsRegressor
from skfda.ml.regression import LinearRegression as FLinearRegression
from skfda.representation.basis import FDataBasis, Fourier, BSpline
from skfda.representation.grid import FDataGrid
from skfda.preprocessing.smoothing import BasisSmoother
from skfda.preprocessing.smoothing.validation import (
SmoothingParameterSearch,
LinearSmootherGeneralizedCVScorer,
akaike_information_criterion
)
from skfda.preprocessing.smoothing.kernel_smoothers import (
NadarayaWatsonSmoother as NW
)
import warnings
from _fpls import FPLS, APLS, FPLSBasis
from sklearn.decomposition import PCA
from sklearn.cross_decomposition import PLSRegression
from sklearn.utils.validation import check_is_fitted
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.exceptions import ConvergenceWarning
import sys
import pickle
import scipy
from multiprocessing import Pool
import utils
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import KFold
from sklearn.pipeline import Pipeline
# -- Configuration
# Extensions
%load_ext autoreload
%autoreload 2
# Plotting configuration
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.rcParams['figure.figsize'] = [6, 4]
plt.style.use('arviz-darkgrid')
NCOLS = 3
def NROWS(x, ncols=NCOLS):
return np.ceil(x/ncols).astype('int')
# Randomness and reproducibility
SEED = 42
np.random.seed(SEED)
rng = np.random.default_rng(SEED)
# Floating point precision for display
np.set_printoptions(precision=3, suppress=True)
pd.set_option("display.precision", 3)
# Multiprocessing
N_CORES = 4
# Ignore warnings
np.seterr(over='ignore', divide='ignore')
os.environ["PYTHONWARNINGS"] = 'ignore::UserWarning'
We consider the model
$$ Y = \alpha_0 + \Psi^{-1}_{X}(\alpha) + \varepsilon, $$i.e.,
$$ Y_i\mid X_i=x_i \sim \mathcal N\left(\alpha_0 + \sum_{j=1}^p \beta_jx_i(\tau_j), \ \sigma^2\right). $$The prior distributions we choose are:
\begin{align*} \pi(\alpha_0, \sigma^2) & \propto 1/\sigma^2, \\ \tau & \sim \mathscr U([0, 1]^p), \\ \beta\mid \tau, \sigma^2 & \sim \mathcal N\left(b_0, g\sigma^2\left[\mathcal X_\tau' \mathcal X_\tau + \eta \lambda_{\text{max}}(\mathcal X_\tau' \mathcal X_\tau)I\right]^{-1}\right), \end{align*}Note that for computational reasons we will work with $\log \sigma$ instead of $\sigma^2$, and hence the associated prior distribution is
$$ \pi(\alpha_0, \log\sigma) \propto 1. $$Writing the parameter vector as $\theta = (\beta, \tau, \alpha_0, \log\sigma)$, the joint posterior probability is:
$$ \pi(\beta, \tau, \alpha_0, \log\sigma\mid Y) \propto \frac{|G_\tau|^{1/2}}{\sigma^{p+n}} \exp\left\{ -\frac{1}{2\sigma^2} \left(\|Y- \alpha_0\boldsymbol{1} - \mathcal X_\tau\beta\|^2 + \frac{1}{g}(\beta - b_0)'G_\tau(\beta - b_0) \right) \right\}. $$Hence, the log-posterior probability is:
$$ \log \pi(\beta, \tau, \alpha_0, \log\sigma\mid Y) \propto \frac{1}{2}\log |G_\tau| - (p+n)\log \sigma -\frac{1}{2\sigma^2} \left(\|Y-\alpha_0\boldsymbol{1} - \mathcal X_\tau\beta\|^2 + \frac{1}{g}(\beta - b_0)'G_\tau(\beta - b_0) \right). $$The metrics considered for model evaluation will be:
We generate a toy dataset with $n=100$ functional regressors $X_i(t) \sim GP(0, K(s, t))$, a response variable given by either a $L^2$ model or a "simple" RKHS function, a value of $\alpha_0=5$ and a variance of $\sigma^2=0.5$. More precisely, we choose one of
$$ Y_i \sim \mathcal N\big(5 -5X_i(0.1) + 10X_i(0.8), \ 0.5\big) $$or
$$ Y_i \sim \mathcal N\left(5 + \int_0^1 \beta(t)X_i(t)\, dt, \ 0.5\right), $$where $\beta(t) \in L^2[0, 1]$.
We consider a regular grid of $N=100$ points on $[0, 1]$. In addition, we center the $X_i$ so that they have zero mean when fed to the sampling algorithms.
We also generate a test dataset with $n_{\text{test}}=50$ regressors for model evaluation.
# -- Data generation parameters
SYNTHETIC_DATA = True
MODEL_GEN = "RKHS" # 'L2' or 'RKHS'
REAL_DATA = "Aemet"
INITIAL_SMOOTHING = "NW" # None, 'NW' or 'Basis'
N_BASIS = 16
STANDARDIZE_PREDICTORS = False
STANDARDIZE_RESPONSE = False
kernel_fn = utils.fractional_brownian_kernel
beta_coef = utils.cholaquidis_scenario3
basis = BSpline(n_basis=N_BASIS)
smoothing_params = np.logspace(-4, 4, 50)
# -- Dataset generation
if SYNTHETIC_DATA:
n_train, n_test = 100, 50
N = 100
grid = np.linspace(1./N, 1., N)
beta_true = np.array([-5., 10.])
tau_true = np.array([0.1, 0.8])
alpha0_true = 5.
sigma2_true = 0.5
if MODEL_GEN == "L2":
x, y = utils.generate_gp_l2_dataset(
grid, kernel_fn,
n_train + n_test, beta_coef, alpha0_true,
sigma2_true, rng=rng
)
elif MODEL_GEN == "RKHS":
x, y = utils.generate_gp_rkhs_dataset(
grid, kernel_fn,
n_train + n_test, beta_true, tau_true,
alpha0_true, sigma2_true, rng=rng
)
else:
raise ValueError("Invalid model generation strategy.")
# Train/test split
X, X_test, Y, Y_test = train_test_split(
x, y, train_size=n_train, random_state=SEED)
# Create FData object
X_fd = skfda.FDataGrid(X, grid)
X_test_fd = skfda.FDataGrid(X_test, grid)
else:
if REAL_DATA == "Tecator":
x, y = skfda.datasets.fetch_tecator(return_X_y=True)
y = np.sqrt(y[:, 1]) # Sqrt-Fat
elif REAL_DATA == "Aemet":
data = skfda.datasets.fetch_aemet()['data']
data_matrix = data.data_matrix
temperature = data_matrix[:, :, 0]
x = FDataGrid(temperature, data.grid_points)
# Log-Sum of log-precipitation for each station
y = np.log(np.exp(data_matrix[:, :, 1]).sum(axis=1))
else:
raise ValueError("REAL_DATA must be 'Tecator' or 'Aemet'.")
X_fd, X_test_fd, Y, Y_test = train_test_split(
x, y, train_size=0.8, random_state=SEED)
N = len(X_fd.grid_points[0])
grid = np.linspace(1./N, 1., N) # TODO: use (normalized) real grid
n_train, n_test = len(X_fd.data_matrix), len(X_test_fd.data_matrix)
if INITIAL_SMOOTHING is not None:
if INITIAL_SMOOTHING == "NW":
smoother = NW()
elif INITIAL_SMOOTHING == "Basis":
smoother = BasisSmoother(basis)
else:
raise ValueError(
f"Expected 'NW' or 'Basis' but got {INITIAL_SMOOTHING}.")
best_smoother = SmoothingParameterSearch(
smoother,
smoothing_params,
scoring=LinearSmootherGeneralizedCVScorer(
akaike_information_criterion),
n_jobs=-1,
)
with utils.IgnoreWarnings():
best_smoother.fit(X_fd)
X_fd = best_smoother.transform(X_fd)
X_test_fd = best_smoother.transform(X_test_fd)
print(f"Smoother: {best_smoother.best_estimator_.__class__.__name__}")
print(
f"Smoothing parameter: {best_smoother.best_params_['smoothing_parameter']:.3f}")
if STANDARDIZE_PREDICTORS:
X_sd = np.sqrt(X_fd.var())
else:
X_sd = 1.0
if STANDARDIZE_RESPONSE:
Y_m = Y.mean()
Y_sd = Y.std()
else:
Y_m = 0.0
Y_sd = 1.0
# Standardize data
X_m = X_fd.mean(axis=0)
X_fd = (X_fd - X_m)/X_sd
X = X_fd.data_matrix.reshape(-1, N)
X_test_fd = (X_test_fd - X_m)/X_sd
X_test = X_test_fd.data_matrix.reshape(-1, N)
Y = (Y - Y_m)/Y_sd
Y_test = (Y_test - Y_m)/Y_sd
utils.plot_dataset(
X, Y, n_samples=n_train if not SYNTHETIC_DATA else n_train//2)
In our algorithms, we consider an unconstrained tranformed parameter space $\tilde \Theta=\mathbb{R}^{2\hat p+2}$ via the bijections
# -- Model hyperparameters
p_hat = 3
g = 5
eta = 0.1
TRANSFORM_TAU = False
FIT_SK = True
# -- Names and labels
# Names of parameters
theta_names = ["β", "τ", "α0", "σ2"]
if TRANSFORM_TAU:
theta_names_ttr = ["β", "logit τ", "α0", "log σ"]
else:
theta_names_ttr = ["β", "τ", "α0", "log σ"]
theta_names_aux = ["α0 and log σ"]
# Grouped labels
theta_labels_grouped = [r"$\beta$", r"$\tau$", r"$\alpha_0$", r"$\sigma^2$"]
# Individual labels
theta_labels = []
for i in range(p_hat):
theta_labels.append(fr"$\beta_{i + 1}$")
for i in range(p_hat):
theta_labels.append(fr"$\tau_{i + 1}$")
theta_labels.append(theta_labels_grouped[-2])
theta_labels.append(theta_labels_grouped[-1])
# Labels for Arviz
theta_labeller = az.labels.MapLabeller(
var_name_map=dict(zip(theta_names[-2:], theta_labels_grouped[-2:])),
coord_map={"projection": dict(
zip(np.arange(p_hat), np.arange(1, p_hat + 1)))}
)
# Dimension of parameter vector
theta_ndim = len(theta_labels)
# Dimension of grouped parameter vector
theta_ndim_grouped = len(theta_names)
# Names of results columns
results_columns = ["Estimator", "Features", "MSE", "RMSE", r"$R^2$"]
# -- Parameter space and miscellaneous
if TRANSFORM_TAU:
tau_ttr = utils.Logit()
else:
tau_ttr = utils.Identity()
# Parameter space
theta_space = utils.ThetaSpace(
p_hat, grid, theta_names, theta_names_ttr, theta_labels, tau_ttr=tau_ttr)
# Statistics for posterior predictive checks
statistics = [
("min", np.min),
("max", np.max),
("median", np.median),
("mean", np.mean),
("std", np.std)]
# Point estimates for posterior distribution
point_estimates = ["mode", "mean", "median"]
# -- Custom CV and transformers
def cv_sk(regressors, folds, X, Y, X_test, Y_test, verbose=False):
df_metrics_sk = pd.DataFrame(columns=results_columns)
for i, (name, pipe, params) in enumerate(regressors):
if verbose:
print(f"Fitting {name}...")
reg_cv = GridSearchCV(pipe, params, scoring="neg_mean_squared_error",
n_jobs=N_CORES, cv=folds)
with utils.IgnoreWarnings():
reg_cv.fit(X, Y)
Y_hat_sk = reg_cv.predict(X_test)
metrics_sk = utils.regression_metrics(Y_test, Y_hat_sk)
if name == "sk_fknn":
n_features = f"K={reg_cv.best_params_['reg__n_neighbors']}"
elif "svm" in name:
n_features = reg_cv.best_estimator_["reg"].n_features_in_
elif "pls1" in name:
n_features = reg_cv.best_estimator_["reg"].n_components
else:
if isinstance(reg_cv.best_estimator_["reg"].coef_[0], FDataBasis):
coef = reg_cv.best_estimator_["reg"].coef_[0].coefficients[0]
else:
coef = reg_cv.best_estimator_["reg"].coef_
n_features = sum(~np.isclose(coef, 0))
df_metrics_sk.loc[i] = [
name,
n_features,
metrics_sk["mse"],
metrics_sk["rmse"],
metrics_sk["r2"]]
df_metrics_sk.sort_values(results_columns[-2], inplace=True)
return df_metrics_sk, reg_cv
def bayesian_var_sel(idata, theta_space, names,
X, Y, X_test, Y_test, folds,
prefix, point_est='mode',
verbose=False):
grid = theta_space.grid
p_hat = theta_space.p
tau_hat = utils.point_estimate(
idata, point_est, names)[p_hat:2*p_hat]
idx_hat = np.abs(grid - tau_hat[:, np.newaxis]).argmin(1)
regressors_var_sel = []
alphas = np.logspace(-4, 4, 20)
params_reg = {"reg__alpha": alphas}
params_svm = {"reg__C": alphas,
"reg__gamma": ['auto', 'scale']}
# Emcee+Lasso
regressors_var_sel.append((f"{prefix}_{point_est}+sk_lasso",
Pipeline([
("var_sel", VariableSelection(grid, idx_hat)),
("data_matrix", DataMatrix()),
("reg", Lasso())]),
params_reg
))
# Emcee+Ridge
regressors_var_sel.append((f"{prefix}_{point_est}+sk_ridge",
Pipeline([
("var_sel", VariableSelection(grid, idx_hat)),
("data_matrix", DataMatrix()),
("reg", Ridge())]),
params_reg
))
# Emcee+SVM RBF
regressors_var_sel.append((f"{prefix}_{point_est}+sk_svm_rbf",
Pipeline([
("var_sel", VariableSelection(grid, idx_hat)),
("data_matrix", DataMatrix()),
("reg", SVR(kernel='rbf'))]),
params_svm
))
df_metrics_var_sel, _ = cv_sk(regressors_var_sel, folds,
X, Y, X_test, Y_test, verbose)
return df_metrics_var_sel
class FeatureSelector(BaseEstimator, TransformerMixin):
def __init__(self, p=1):
self.p = p
def fit(self, X, y=None):
N = X.shape[1]
self.idx_ = np.linspace(0, N - 1, self.p).astype(int)
return self
def transform(self, X, y=None):
return X[:, self.idx_]
class DataMatrix(BaseEstimator, TransformerMixin):
def fit(self, X, y=None):
self.N = len(X.grid_points[0])
return self
def transform(self, X, y=None):
return X.data_matrix.reshape(-1, self.N)
class Basis(BaseEstimator, TransformerMixin):
def __init__(self, basis=Fourier()):
self.basis = basis
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
return X.to_basis(self.basis)
class VariableSelection(BaseEstimator, TransformerMixin):
def __init__(self, grid=None, idx=None):
self.grid = grid
self.idx = idx
self.idx.sort()
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
return FDataGrid(X.data_matrix[:, self.idx], self.grid[self.idx])
class PLSRegressionWrapper(PLSRegression):
def transform(self, X, y=None):
check_is_fitted(self)
return super().transform(X)
def fit_transform(self, X, y):
return self.fit(X, y).transform(X)
def predict(self, X, copy=True):
check_is_fitted(self)
if self.coef_.shape[1] == 1: # if n_targets == 1
return super().predict(X, copy)[:, 0]
else:
return super().predict(X, copy)
# -- Select family of regressors
regressors = []
alphas = np.logspace(-4, 4, 20)
n_selected = [5, 10, 15, 20, 25, X.shape[1]]
n_components = [2, 3, 4, 5, 10]
n_basis_bsplines = [8, 10, 12, 14, 16]
n_basis_fourier = [3, 5, 7, 9, 11]
n_neighbors = [3, 5, 7]
basis_bspline = [BSpline(n_basis=p) for p in n_basis_bsplines]
basis_fourier = [Fourier(n_basis=p) for p in n_basis_fourier]
basis_fpls = []
for p in n_components:
try:
basis_fpls.append(FPLSBasis(X_fd, Y, n_basis=p))
except ValueError:
print(f"Can't create FPLSBasis with n_basis={p}")
continue
params_reg = {"reg__alpha": alphas}
params_svm = {"reg__C": alphas,
"reg__gamma": ['auto', 'scale']}
params_select = {"selector__p": n_selected}
params_pls = {"reg__n_components": n_components}
params_dim_red = {"dim_red__n_components": n_components}
params_basis = {"basis__basis": basis_bspline + basis_fourier}
params_basis_fpca = {"basis__n_basis": n_components}
params_basis_fpls = {"basis__basis": basis_fpls}
params_knn = {"reg__n_neighbors": n_neighbors,
"reg__weights": ['uniform', 'distance']}
params_mrmr = {"var_sel__method": ["MID", "MIQ"],
"var_sel__n_features_to_select": n_components}
"""
MULTIVARIATE MODELS
"""
# Lasso
regressors.append(("sk_lasso",
Pipeline([
("data_matrix", DataMatrix()),
("reg", Lasso())]),
params_reg
))
# PLS1 regression
regressors.append(("sk_pls1",
Pipeline([
("data_matrix", DataMatrix()),
("reg", PLSRegressionWrapper())]),
params_pls
))
"""
VARIABLE SELECTION + MULTIVARIATE MODELS
"""
# Manual+Ridge
regressors.append(("manual_sel+sk_ridge",
Pipeline([
("data_matrix", DataMatrix()),
("selector", FeatureSelector()),
("reg", Ridge())]),
{**params_reg, **params_select}
))
# FPCA+Ridge
regressors.append(("fpca+sk_ridge",
Pipeline([
("dim_red", FPCA()), # Retains scores only
("reg", Ridge())]),
{**params_dim_red, **params_reg}
))
"""
TARDA DEMASIADO (búsqueda en CV demasiado grande?)
# FPLS (fixed basis)+Ridge
regressors.append(("fpls_basis+sk_ridge",
Pipeline([
("basis", Basis()),
("dim_red", FPLS()),
("reg", Ridge())]),
{**params_basis, **params_dim_red, **params_reg}
))
"""
# PCA+Ridge
regressors.append(("pca+sk_ridge",
Pipeline([
("data_matrix", DataMatrix()),
("dim_red", PCA(random_state=SEED)),
("reg", Ridge())]),
{**params_dim_red, **params_reg}
))
# PLS+Ridge
regressors.append(("pls+sk_ridge",
Pipeline([
("data_matrix", DataMatrix()),
("dim_red", PLSRegressionWrapper()),
("reg", Ridge())]),
{**params_dim_red, **params_reg}
))
# RMH+Ridge
regressors.append(("rmh+sk_ridge",
Pipeline([
("var_sel", RMH()),
("reg", Ridge())]),
params_reg
))
"""
TARDA DEMASIADO (búsqueda en CV demasiado grande?)
# mRMR+Ridge
regressors.append(("mRMR+sk_ridge",
Pipeline([
("var_sel", mRMR()),
("reg", Ridge())]),
{**params_mrmr, **params_reg}
))
"""
# Manual+SVM RBF
regressors.append(("manual_sel+sk_svm_rbf",
Pipeline([
("data_matrix", DataMatrix()),
("selector", FeatureSelector()),
("reg", SVR(kernel='rbf'))]),
{**params_select, **params_svm}
))
# FPCA+SVM RBF
regressors.append(("fpca+sk_svm_rbf",
Pipeline([
("dim_red", FPCA()), # Retains scores only
("reg", SVR(kernel='rbf'))]),
{**params_dim_red, **params_svm}
))
"""
TARDA DEMASIADO (búsqueda en CV demasiado grande?)
# FPLS (fixed basis)+SMV RBF
regressors.append(("fpls_basis+sk_svm_rbf",
Pipeline([
("basis", Basis()),
("dim_red", FPLS()),
("reg", SVR(kernel='rbf'))]),
{**params_basis, **params_dim_red, **params_svm}
))
"""
# PCA+SVM RBF
regressors.append(("pca+sk_svm_rbf",
Pipeline([
("data_matrix", DataMatrix()),
("dim_red", PCA(random_state=SEED)),
("reg", SVR(kernel='rbf'))]),
{**params_dim_red, **params_svm}
))
# PLS+SMV RBF
regressors.append(("pls+sk_svm_rbf",
Pipeline([
("data_matrix", DataMatrix()),
("dim_red", PLSRegressionWrapper()),
("reg", SVR(kernel='rbf'))]),
{**params_dim_red, **params_svm}
))
# RMH+SVM RBF
regressors.append(("rmh+sk_svm_rbf",
Pipeline([
("var_sel", RMH()),
("reg", SVR(kernel='rbf'))]),
params_svm
))
"""
TARDA DEMASIADO (búsqueda en CV demasiado grande?)
# mRMR+SVM RBF
regressors.append(("mRMR+sk_svm_rbf",
Pipeline([
("var_sel", mRMR()),
("reg", SVR(kernel='rbf'))]),
{**params_mrmr, **params_svm}
))
"""
"""
FUNCTIONAL MODELS
"""
regressors.append(("sk_apls",
Pipeline([
("reg", APLS())]),
params_pls
))
# NOTE: while not strictly necessary, the test data undergoes the
# same basis expansion process as the training data. This is more
# computationally efficient and seems to improve the performance.
# Fixed basis + Functional Linear Regression
regressors.append(("sk_flin_basis",
Pipeline([
("basis", Basis()),
("reg", FLinearRegression())]),
params_basis
))
"""
TARDA BASTANTE (cálculo de Gram matrix costoso en la base)
# FPCA basis + Functional Linear Regression
regressors.append(("sk_flin_khl",
Pipeline([
("basis", FPCABasis()),
("reg", FLinearRegression())]),
params_basis_fpca
))
"""
"""
TARDA BASTANTE (cálculo de Gram matrix costoso en la base)
# FPLS basis + Functional Linear Regression
regressors.append(("sk_flin_fpls",
Pipeline([
("basis", Basis()),
("reg", FLinearRegression())]),
params_basis_fpls
))
"""
# Fixed basis + FPLS1 regression
regressors.append(("sk_fpls1_basis",
Pipeline([
("basis", Basis()),
("reg", FPLS())]),
{**params_basis, **params_pls}
))
# KNeighbors Functional Regression
regressors.append(("sk_fknn",
Pipeline([
("reg", KNeighborsRegressor())]),
params_knn
))
# -- Fit models and show metrics
folds = KFold(shuffle=True, random_state=SEED)
if FIT_SK:
df_metrics_sk, reg_cv = cv_sk(regressors, folds, X_fd, Y,
X_test_fd, Y_test, verbose=True)
display(df_metrics_sk.style.hide_index())
# -- Negative log-likelihood definition in transformed parameter space
def neg_ll(theta_tr, X, Y, theta_space):
"""Transformed parameter vector 'theta_tr' is (β, τ, α0, log σ)."""
n, N = X.shape
grid = theta_space.grid
assert len(theta_tr) == theta_space.ndim
theta = theta_space.backward(theta_tr)
beta, tau, alpha0, sigma2 = theta_space.get_params(theta)
log_sigma = theta_space.get_sigma2(theta_tr)
idx = np.abs(grid - tau[:, np.newaxis]).argmin(1)
X_tau = X[:, idx]
return -(-n*log_sigma
- np.linalg.norm(Y - alpha0 - X_tau@beta)**2/(2*sigma2))
# -- MLE estimation functions
def optimizer_global(random_state, args):
theta_init, X, Y, theta_space, method, bounds = args
return scipy.optimize.basinhopping(
neg_ll,
x0=theta_init,
seed=random_state,
minimizer_kwargs={"args": (X, Y, theta_space),
"method": method,
"bounds": bounds}
).x
def compute_mle(
theta_space, X, Y,
method='Powell',
strategy='global',
rng=None):
if rng is None:
rng = np.random.default_rng()
p = theta_space.p
theta_init = theta_space.forward(
np.array([0.0]*p + [0.5]*p + [Y.mean()] + [1.0]))
if TRANSFORM_TAU:
bounds = None
else:
bounds = ([(None, None)]*p
+ [(theta_space.tau_lb, theta_space.tau_ub)]*p
+ [(None, None)]
+ [(None, None)])
with utils.IgnoreWarnings():
if strategy == 'local':
mle_theta = scipy.optimize.minimize(
neg_ll,
x0=theta_init,
bounds=bounds,
method=method,
args=(X, Y, theta_space)
).x
bic = utils.compute_bic(theta_space, neg_ll, mle_theta, X, Y)
elif strategy == 'global':
mles = np.zeros((N_CORES, theta_space.ndim))
with Pool(N_CORES) as pool:
print(f"-- Computing MLE with {N_CORES} independent runs --")
random_states = [rng.integers(2**32) for i in range(N_CORES)]
args_optim = [theta_init, X, Y, theta_space, method, bounds]
mles = pool.starmap(
optimizer_global, product(random_states, [args_optim]))
bics = utils.bic = utils.compute_bic(
theta_space, neg_ll, mles, X, Y)
mle_theta = mles[np.argmin(bics)]
bic = bics[np.argmin(bics)]
else:
raise ValueError(
"Parameter 'strategy' must be one of {'local', 'global'}.")
return mle_theta, bic
# -- MLE computation
method_mle = 'L-BFGS-B' # 'Nelder-Mead', 'Powell' or 'L-BFGS-B'
strategy_mle = 'global'
mle_theta_tr, bic = compute_mle(
theta_space, X, Y, method_mle,
strategy_mle, rng)
mle_theta = theta_space.backward(mle_theta_tr)
Y_hat_mle = utils.generate_response(X_test, mle_theta, noise=False)
df_metrics_mle = pd.DataFrame(columns=results_columns)
metrics_mle = utils.regression_metrics(Y_test, Y_hat_mle)
df_metrics_mle.loc[0] = [
"mle",
p_hat,
metrics_mle["mse"],
metrics_mle["rmse"],
metrics_mle["r2"]
]
print(f"\nBIC: {bic:.3f}")
print("MLE:")
display(pd.DataFrame(zip(theta_space.labels, mle_theta),
columns=["", "MLE"]).style.hide_index())
print("Regression metrics:")
df_metrics_mle.style.hide_index()
import emcee
We only need to provide the sampler with the logarithm of the posterior distribution. For clarity we split up its computation in log-prior and log-likelihood, although for a more efficient implementation it should all be in one function.
# -- Log-posterior model
def log_prior(theta_tr):
"""Global parameters (for efficient parallelization):
X, b0, g, eta, theta_space"""
assert len(theta_tr) == theta_space.ndim
n, N = X.shape
p = theta_space.p
grid = theta_space.grid
theta = theta_space.backward(theta_tr)
beta, tau, alpha0, sigma2 = theta_space.get_params(theta)
log_sigma = theta_space.get_sigma2(theta_tr)
if not TRANSFORM_TAU:
if (tau < theta_space.tau_lb).any() or (tau > theta_space.tau_ub).any():
return -np.inf
# Transform variables
b = beta - b0
# Compute and regularize G_tau
idx = np.abs(grid - tau[:, np.newaxis]).argmin(1)
X_tau = X[:, idx]
G_tau = X_tau.T@X_tau
G_tau = (G_tau + G_tau.T)/2. # Enforce symmetry
G_tau_reg = G_tau + eta * \
np.max(np.linalg.eigvalsh(G_tau))*np.identity(p)
# Compute log-prior
log_prior = (0.5*utils.logdet(G_tau_reg)
- p*log_sigma
- b.T@G_tau_reg@b/(2*g*sigma2))
return log_prior
def log_likelihood(theta_tr, Y):
"""Global parameters (for efficient parallelization):
X, theta_space, return_ll"""
n, N = X.shape
grid = theta_space.grid
assert len(theta_tr) == theta_space.ndim
theta = theta_space.backward(theta_tr)
beta, tau, alpha0, sigma2 = theta_space.get_params(theta)
log_sigma = theta_space.get_sigma2(theta_tr)
idx = np.abs(grid - tau[:, np.newaxis]).argmin(1)
X_tau = X[:, idx]
ll = (-n*log_sigma
- np.linalg.norm(Y - alpha0 - X_tau@beta)**2/(2*sigma2))
if return_ll:
# Add constant term so that it is the genuine log-probability
ll_pointwise = (-log_sigma - 0.5*np.log(2*np.pi)
- (Y - alpha0 - X_tau@beta)**2/(2*sigma2))
return ll, ll_pointwise
else:
return ll
def log_posterior(theta_tr, Y):
"""Global parameters (for efficient parallelization):
X, rng, return_pp, return_ll, theta_space"""
# Compute log-prior
lp = log_prior(theta_tr)
if not np.isfinite(lp):
if return_pp and return_ll:
return -np.inf, np.full_like(Y, -np.inf), np.full_like(Y, -np.inf)
elif return_pp:
return -np.inf, np.full_like(Y, -np.inf)
elif return_ll:
return -np.inf, np.full_like(Y, -np.inf)
else:
return -np.inf
# Compute log-likelihood (and possibly pointwise log-likelihood)
if return_ll:
ll, ll_pointwise = log_likelihood(theta_tr, Y)
else:
ll = log_likelihood(theta_tr, Y)
# Compute log-posterior
lpos = lp + ll
# Compute posterior predictive samples
if return_pp:
theta = theta_space.backward(theta_tr)
pp = utils.generate_response(X, theta, rng=rng)
# Return information
if return_pp and return_ll:
return lpos, pp, ll_pointwise
elif return_pp:
return lpos, pp
elif return_ll:
return lpos, ll_pointwise
else:
return lpos
We set up the initial points of the chains to be in a random neighbourhood around the MLE to increase the speed of convergence.
def run_emcee(n_walkers, n_iter_initial, n_iter, moves,
thin, thin_pp, return_pp, return_ll):
# -- Run sampler
with Pool(N_CORES) as pool:
print(
f"-- Running affine-invariant ensemble sampler with {N_CORES} cores --")
sampler = emcee.EnsembleSampler(
n_walkers, theta_ndim, log_posterior,
pool=pool, args=(Y,),
moves=moves)
print("Tuning phase...")
state = sampler.run_mcmc(
p0, n_iter_initial, progress='notebook',
store=False)
sampler.reset()
print("MCMC sampling...")
sampler.run_mcmc(state, n_iter, progress='notebook')
print(
f"Mean acceptance fraction: {100*np.mean(sampler.acceptance_fraction):.3f}%")
logging.disable(sys.maxsize) # Disable logger
# Analyze autocorrelation and set burn-in and thinning values
autocorr = sampler.get_autocorr_time(quiet=True)
max_autocorr = np.max(autocorr)
if (np.isfinite(autocorr)).all():
burn = int(3*max_autocorr)
else:
print("Some autocorrelation value is not finite")
burn = 500
logging.disable(logging.NOTSET) # Re-enable logger
# Get InferenceData object
idata_emcee = utils.emcee_to_idata(
sampler, theta_space, burn, thin,
["y_star"] if return_pp else [], return_ll)
print("\n-- Summary statistics --")
display(utils.summary(idata_emcee, var_names=theta_names,
kind="stats", labeller=theta_labeller))
# -- Compute metrics using several point estimates
df_metrics_emcee = pd.DataFrame(columns=results_columns)
# Posterior mean estimate
pp_test = utils.generate_pp(
idata_emcee, X_test, theta_names, thin_pp, rng=rng)
print("Computing metrics...", end="\r")
Y_hat_pp = pp_test.mean(axis=(0, 1))
metrics_pp = utils.regression_metrics(Y_test, Y_hat_pp)
df_metrics_emcee.loc[0] = [
"emcee_posterior_mean",
p_hat,
metrics_pp["mse"],
metrics_pp["rmse"],
metrics_pp["r2"]
]
# Point estimates
for i, pe in enumerate(point_estimates):
Y_hat_pe = utils.point_predict(
X_test, idata_emcee,
theta_names, pe)
metrics_pe = utils.regression_metrics(Y_test, Y_hat_pe)
df_metrics_emcee.loc[i + 1] = [
"emcee_" + pe,
p_hat,
metrics_pe["mse"],
metrics_pe["rmse"],
metrics_pe["r2"]
]
# Bayesian variable selection
for pe in point_estimates:
df_metrics_var_sel = bayesian_var_sel(
idata_emcee, theta_space, theta_names, X_fd,
Y, X_test_fd, Y_test, folds, prefix="emcee",
point_est=pe)
df_metrics_emcee = df_metrics_emcee.append(df_metrics_var_sel)
if FIT_SK:
df_metrics_emcee = df_metrics_emcee.append(df_metrics_sk)
df_metrics_emcee = df_metrics_emcee.append(df_metrics_mle)
df_metrics_emcee.sort_values(results_columns[-2], inplace=True)
print("-- Regression metrics --")
display(df_metrics_emcee.style.hide_index())
return sampler, idata_emcee, df_metrics_emcee
# -- Sampler parameters
n_walkers = 64
n_iter_initial = 100
n_iter = 1000
return_pp = True
return_ll = True
frac_random = 0.3
sd_beta_init = 1.0
sd_tau_init = 0.2
mean_alpha0_init = Y.mean()
sd_alpha0_init = 1.0
param_sigma2_init = 2.0 # shape parameter in inv_gamma distribution
sd_sigma2_init = 1.0
moves = [
(emcee.moves.StretchMove(), 0.7),
(emcee.moves.WalkMove(), 0.3)
]
thin = 1
thin_pp = 5
FAST_RUN = True
# -- Run sampler
# Start every walker in a (random) neighbourhood around the MLE
p0 = utils.weighted_initial_guess_around_value(
theta_space, mle_theta_tr, sd_beta_init, sd_tau_init,
mean_alpha0_init, sd_alpha0_init, param_sigma2_init,
sd_sigma2_init, n_walkers=n_walkers, rng=rng,
frac_random=frac_random)
b0 = mle_theta_tr[theta_space.beta_idx]
if FAST_RUN:
sampler, idata_emcee, df_metrics_emcee_full = run_emcee(
n_walkers, n_iter_initial, n_iter, moves,
thin, thin_pp, return_pp, return_ll)
else:
with Pool(N_CORES) as pool:
print(
f"-- Running affine-invariant ensemble sampler with {N_CORES} cores --")
sampler = emcee.EnsembleSampler(
n_walkers, theta_ndim, log_posterior,
pool=pool, args=(Y,),
moves=moves)
print("Tuning phase...")
state = sampler.run_mcmc(
p0, n_iter_initial, progress='notebook',
store=False)
sampler.reset()
print("MCMC sampling...")
sampler.run_mcmc(state, n_iter, progress='notebook')
print(
f"Mean acceptance fraction: {100*np.mean(sampler.acceptance_fraction):.3f}%")
We analyze the samples of all chains, discarding a few times the integrated autocorrelation times worth of samples. We could also perform thinning and take only every $k$-th value.
# -- Sampler statistics and trace (with burn-in and thinning)
logging.disable(sys.maxsize) # Disable logger
# Analyze autocorrelation and set burn-in and thinning values
autocorr = sampler.get_autocorr_time(quiet=True)
max_autocorr = np.max(autocorr)
if (np.isfinite(autocorr)).all():
burn = int(3*max_autocorr)
else:
print("Some autocorrelation value is not finite")
burn = 500
# Get trace of samples
trace_flat = utils.get_trace_emcee(sampler, theta_space, burn, thin, flat=True)
# Get InferenceData object
idata_emcee = utils.emcee_to_idata(
sampler, theta_space, burn, thin,
["y_star"] if return_pp else [], return_ll)
# Update and show autocorrelation
autocorr_thin = sampler.get_autocorr_time(discard=burn, thin=thin, quiet=True)
logging.disable(logging.NOTSET) # Re-enable logger
pd.DataFrame(
zip(theta_labels, autocorr_thin, len(trace_flat)/autocorr_thin),
columns=["", "Autocorrelation times", "Effective i.i.d samples"]
).style.hide_index()
utils.summary(idata_emcee, var_names=theta_names,
kind="stats", labeller=theta_labeller)
az.plot_trace(idata_emcee, labeller=theta_labeller,
combined=True, var_names=theta_names)
print("Combined density and trace plot:")
az.plot_posterior(idata_emcee, labeller=theta_labeller, point_estimate='mode',
grid=(NROWS(theta_ndim), NCOLS), textsize=20,
var_names=theta_names)
print("Marginal posterior distributions:")
We can perform a couple of visual posterior predictive checks. In particular:
We also show the Bayesian p-value for several statistics, which is defined as $P(T(y^*)\leq T(y)\mid y)$, and is computed by simply measuring the proportion of generated samples $\{T(Y^*_m)\}_m$ that fall below the real value of the statistic. It is expected to be around $0.5$ when the model accurately represents the data.
# -- Generate and plot posterior predictive checks from X
if "posterior_predictive" not in idata_emcee:
pp = utils.generate_pp(idata_emcee, X, theta_names, rng=rng)
utils.pp_to_idata([pp], idata_emcee, ['y_star'], merge=True)
else:
pp = idata_emcee.posterior_predictive['y_star'].to_numpy()
# Posterior predictive checks
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
plt.suptitle(r"Posterior predictive checks on $X$")
utils.plot_ppc(idata_emcee, n_samples=500, ax=axs[0],
data_pairs={'y_obs': 'y_star'})
az.plot_bpv(idata_emcee, kind='t_stat', t_stat='mean',
ax=axs[1], plot_mean=False,
data_pairs={'y_obs': 'y_star'}, bpv=False)
axs[1].axvline(Y.mean(), ls="--",
color="r", lw=2, label=r"$\bar Y$")
handles, labels = axs[1].get_legend_handles_labels()
handles.extend([Line2D([0], [0], label=r"Distribution of $\bar Y^*$")])
axs[1].legend(handles=handles)
# Show Bayesian p-values
for name, stat in statistics:
bpv = utils.bpv(pp, Y, stat)
print(f"bpv [T={name}]: {bpv:.3f}")
az.plot_autocorr(idata_emcee, combined=True, var_names=theta_names,
grid=(NROWS(theta_ndim), NCOLS), labeller=theta_labeller)
print("Combined autocorrelation times:")
# -- Generate and plot posterior predictive checks from X_test
# Posterior predictive checks
pp_test = utils.generate_pp(idata_emcee, X_test, theta_names, rng=rng)
idata_pp_test = utils.pp_to_idata(
[pp_test], idata_emcee, ['y_star'], y_obs=Y_test)
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
plt.suptitle(r"Posterior predictive checks on $X_{test}$")
utils.plot_ppc(idata_pp_test, n_samples=500, data_pairs={
'y_obs': 'y_star'}, ax=axs[0])
az.plot_bpv(idata_pp_test, kind='t_stat', t_stat='mean', data_pairs={
'y_obs': 'y_star'}, plot_mean=False, ax=axs[1], bpv=False)
axs[1].axvline(Y_test.mean(), ls="--",
color="r", lw=2, label=r"$\bar Y_{test}$")
handles, labels = axs[1].get_legend_handles_labels()
handles.extend([Line2D([0], [0], label=r"Distribution of $\bar Y^*_{test}$")])
_ = axs[1].legend(handles=handles)
# Show Bayesian p-values
for name, stat in statistics:
bpv = utils.bpv(pp_test, Y_test, stat)
print(f"bpv [T={name}]: {bpv:.3f}")
# -- Compute metrics using several point estimates
df_metrics_emcee = pd.DataFrame(columns=results_columns)
# Posterior mean estimate
Y_hat_pp = pp_test[:, ::thin_pp, :].mean(axis=(0, 1))
metrics_pp = utils.regression_metrics(Y_test, Y_hat_pp)
df_metrics_emcee.loc[0] = [
"emcee_posterior_mean",
p_hat,
metrics_pp["mse"],
metrics_pp["rmse"],
metrics_pp["r2"]
]
# Point estimates
for i, pe in enumerate(point_estimates):
Y_hat_pe = utils.point_predict(
X_test, idata_emcee,
theta_names, pe)
metrics_pe = utils.regression_metrics(Y_test, Y_hat_pe)
df_metrics_emcee.loc[i + 1] = [
"emcee_" + pe,
p_hat,
metrics_pe["mse"],
metrics_pe["rmse"],
metrics_pe["r2"]
]
df_metrics_emcee.sort_values(results_columns[-2], inplace=True)
df_metrics_emcee.style.hide_index()
# -- Test variable selection procedure
df_metrics_emcee_var_sel = pd.DataFrame(columns=results_columns)
for pe in point_estimates:
df_var_sel = bayesian_var_sel(
idata_emcee, theta_space, theta_names, X_fd,
Y, X_test_fd, Y_test, folds, prefix="emcee",
point_est=pe)
df_metrics_emcee_var_sel = df_metrics_emcee_var_sel.append(df_var_sel)
df_metrics_emcee_var_sel.sort_values(results_columns[-2], inplace=True)
df_metrics_emcee_var_sel.style.hide_index()
This is only for testing purposes; in a production environment one should use the Backends feature of emcee.
# -- Save
with open("emcee-p-fixed.idata", 'wb') as file:
pickle.dump(idata_emcee, file)
# -- Load
with open("emcee-p-fixed.idata", 'rb') as file:
idata_emcee = pickle.load(file)
trace = idata_emcee.posterior.to_array().to_numpy().T
trace_flat = trace.reshape(-1, trace.shape[-1]) # All chains combined
import pymc3 as pm
import theano
import theano.tensor as tt
# -- Probabilistic model
def make_model(theta_space, g, eta, X, Y, names, names_aux, mle_theta=None):
n, N = X.shape
grid = theta_space.grid
p = theta_space.p
if mle_theta is not None:
b0 = mle_theta[:p]
else:
b0 = g*rng.standard_normal(size=p) # <-- Change if needed
with pm.Model() as model:
X_pm = pm.Data('X', X)
alpha0_and_log_sigma = pm.DensityDist(
names_aux[0], lambda x: 0, shape=(2,))
alpha0 = pm.Deterministic(names[-2], alpha0_and_log_sigma[0])
log_sigma = alpha0_and_log_sigma[1]
sigma = pm.math.exp(log_sigma)
sigma2 = pm.Deterministic(names[-1], sigma**2)
tau = pm.Uniform(names[1], 0.0, 1.0, shape=(p,))
idx = np.abs(grid - tau[:, np.newaxis]).argmin(1)
X_tau = X_pm[:, idx]
G_tau = pm.math.matrix_dot(X_tau.T, X_tau)
G_tau = (G_tau + G_tau.T)/2. # Enforce symmetry
G_tau_reg = G_tau + eta * \
tt.max(tt.nlinalg.eigh(G_tau)[0])*np.identity(p)
def beta_lprior(x):
b = x - b0
return (0.5*pm.math.logdet(G_tau_reg)
- p*log_sigma
- pm.math.matrix_dot(b.T, G_tau_reg, b)/(2.*g*sigma2))
beta = pm.DensityDist(names[0], beta_lprior, shape=(p,))
expected_obs = alpha0 + pm.math.matrix_dot(X_tau, beta)
y_obs = pm.Normal('y_obs', mu=expected_obs, sigma=sigma, observed=Y)
return model
# -- Hyperparameters
burn = 0
thin = 1
thin_pp = 5
n_samples_nuts = 1000
tune_nuts = 1000
target_accept = 0.8
n_samples_metropolis = 10000
tune_metropolis = 5000
USE_NUTS = False
# -- Run sampler
model = make_model(theta_space, g, eta, X, Y, theta_names,
theta_names_aux[:1], mle_theta_tr)
with model:
if USE_NUTS:
idata_pymc = pm.sample(n_samples_nuts, cores=2,
tune=tune_nuts,
target_accept=target_accept,
return_inferencedata=True)
else:
step = pm.Metropolis()
idata_pymc = pm.sample(n_samples_metropolis, cores=2,
tune=tune_metropolis, step=step,
return_inferencedata=True)
idata_pymc = idata_pymc.sel(draw=slice(burn, None, thin))
Since the tuning iterations already serve as burn-in, we keep the whole trace. In addition, we could consider thinning the samples.
utils.summary(idata_pymc, var_names=theta_names, labeller=theta_labeller)
az.plot_trace(idata_pymc, var_names=theta_names, labeller=theta_labeller)
print("Density and trace plot:")
az.plot_posterior(
idata_pymc, point_estimate='mode',
var_names=theta_names,
labeller=theta_labeller,
textsize=20,
grid=(NROWS(theta_ndim), NCOLS))
print("Marginal posterior distributions:")
# -- Generate and plot posterior predictive samples from X
with model:
print("Generating posterior predictive samples...")
pp = utils.generate_pp(idata_pymc, X, theta_names, rng=rng)
utils.pp_to_idata([pp], idata_pymc, ['y_star'], merge=True)
# Posterior predictive checks
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
plt.suptitle(r"Posterior predictive checks on $X$")
utils.plot_ppc(idata_pymc, n_samples=500,
ax=axs[0], data_pairs={"y_obs": "y_star"})
az.plot_bpv(idata_pymc, kind='t_stat', t_stat='mean',
plot_mean=False, ax=axs[1],
bpv=False, data_pairs={"y_obs": "y_star"})
axs[1].axvline(Y.mean(), ls="--",
color="r", lw=2, label=r"$\bar Y$")
handles, labels = axs[1].get_legend_handles_labels()
handles.extend([Line2D([0], [0], label=r"Distribution of $\bar Y^*$")])
_ = axs[1].legend(handles=handles)
# Show Bayesian p-values
for name, stat in statistics:
bpv = utils.bpv(pp, Y, stat)
print(f"bpv [T={name}]: {bpv:.3f}")
az.plot_autocorr(idata_pymc, var_names=theta_names,
combined=True, grid=(NROWS(theta_ndim), NCOLS),
labeller=theta_labeller)
print("Combined autocorrelation times:")
print("Graphical model:")
pm.model_graph.model_to_graphviz(model)
First we take a look at the distribution of predictions on a previously unseen dataset.
# -- Generate and plot posterior predictive samples from X_test
model_test = make_model(theta_space, g, eta, X_test, Y_test, theta_names,
theta_names_aux[:1], mle_theta_tr)
with model_test:
print("Generating posterior predictive on hold-out data...")
pp_test = utils.generate_pp(idata_pymc, X_test, theta_names, rng=rng)
idata_pp_test = utils.pp_to_idata(
[pp_test], idata_pymc, ['y_star'], y_obs=Y_test)
# Posterior predictive checks
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
plt.suptitle(r"Posterior predictive checks on $X_{test}$")
utils.plot_ppc(idata_pp_test, n_samples=500,
ax=axs[0], data_pairs={"y_obs": "y_star"})
az.plot_bpv(idata_pp_test, kind='t_stat', t_stat='mean',
plot_mean=False, ax=axs[1],
bpv=False, data_pairs={"y_obs": "y_star"})
axs[1].axvline(Y_test.mean(), ls="--",
color="r", lw=2, label=r"$\bar Y_{test}$")
handles, labels = axs[1].get_legend_handles_labels()
handles.extend([Line2D([0], [0], label=r"Distribution of $\bar Y_{test}^*$")])
_ = axs[1].legend(handles=handles)
# Show Bayesian p-values
for name, stat in statistics:
bpv = utils.bpv(pp_test, Y_test, stat)
print(f"bpv [T={name}]: {bpv:.3f}")
Next we look at the MSE when using several point-estimates for the parameters, as well as the mean of the posterior samples.
# -- Compute metrics using several point estimates
df_metrics_pymc = pd.DataFrame(columns=results_columns)
# Posterior mean estimate
Y_hat_pp = pp_test[:, ::thin_pp, :].mean(axis=(0, 1))
metrics_pp = utils.regression_metrics(Y_test, Y_hat_pp)
df_metrics_pymc.loc[0] = [
"pymc_posterior_mean",
p_hat,
metrics_pp["mse"],
metrics_pp["rmse"],
metrics_pp["r2"]
]
# Point estimates
for i, pe in enumerate(point_estimates):
Y_hat_pe = utils.point_predict(
X_test, idata_pymc,
theta_names, pe)
metrics_pe = utils.regression_metrics(Y_test, Y_hat_pe)
df_metrics_pymc.loc[i + 1] = [
"pymc_" + pe,
p_hat,
metrics_pe["mse"],
metrics_pe["rmse"],
metrics_pe["r2"]
]
df_metrics_pymc.sort_values(results_columns[-2], inplace=True)
df_metrics_pymc.style.hide_index()
# -- Test variable selection procedure
df_metrics_pymc_var_sel = pd.DataFrame(columns=results_columns)
for pe in point_estimates:
df_var_sel = bayesian_var_sel(
idata_pymc, theta_space, theta_names, X_fd,
Y, X_test_fd, Y_test, folds, prefix="pymc",
point_est=pe)
df_metrics_pymc_var_sel = df_metrics_pymc_var_sel.append(df_var_sel)
df_metrics_pymc_var_sel.sort_values(
results_columns[-2], inplace=True)
df_metrics_pymc_var_sel.style.hide_index()
# -- Save
_ = idata_pymc.to_netcdf("pymc-p-fixed.nc")
# -- Load
idata_pymc = az.from_netcdf("pymc-p-fixed.nc")
%load_ext watermark
%watermark -n -u -v -iv -w